home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The Arsenal Files 4
/
The Arsenal Files 4 (Arsenal Computer).ISO
/
casm
/
au116-as.exe
/
DUPES.CPP
< prev
next >
Wrap
C/C++ Source or Header
|
1994-11-18
|
12KB
|
456 lines
// DUPES.CPP 1 1 6666
// Dave Harris 11 11 6
// Compiled using Borland C++ ver 3.1 1 1 1 1 6666
// 03-03-94 1 .. 1 6 6
// 11111 .. 11111 666
////////////////////////////////////////////////////////////////////////
#include "au.hpp"
#define PROGRAM "DUPES" // Name of module
typedef struct
{
char arcFile[80];
char filename[FILE_SIZE];
unsigned long crc;
long size;
long next; /* next file record */
unsigned short used; /* used flag */
} FILE_CONT;
typedef struct
{
long first;
long last;
} HASH;
#define HASH_SLOTS 1024
/*********************************************************************/
typedef struct
{
char do_arcs; // do arced files or not */
HASH *hash;
long last_record;
long number_inside_processed;
long size_thresh;
char data_file_name[FLENGTH];
HANDLE data_handle;
char log_file_name[FLENGTH];
HANDLE log_handle;
BYTE keep_data_file;
BYTE use_32;
BYTE verbose;
unsigned short used; // Used number to use
} DUPES_INFO;
#define HASH_TABLE_SIZE (sizeof(HASH) * HASH_SLOTS)
/*░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░*/
static void dupes_log(AU *au, char *format, ...)
{
DUPES_INFO *in = (DUPES_INFO *)au->info;
va_list plist;
char string[200];
va_start(plist, format);
vsprintf(string, format, plist);
va_end(plist);
if (in->log_file_name[0] != '\0')
{
in->log_handle.write_raw(string, strlen(string));
}
return;
}
/*░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░*/
static void read_hash_table(AU *au)
{
DUPES_INFO *in = (DUPES_INFO *)au->info;
long pos;
in->data_handle.seek_raw(0L, SEEK_SET);
if (in->data_handle.read_raw(in->hash, HASH_TABLE_SIZE) != HASH_TABLE_SIZE)
{
au_printf_error(au, "\nRead Error");
exit(1);
}
if (in->data_handle.read_raw(&in->used, 2) != 2)
{
au_printf_error(au, "\nRead Error");
exit(1);
}
in->used++;
pos = in->data_handle.file_length();
in->last_record = (pos - HASH_TABLE_SIZE - 2)/sizeof(FILE_CONT);
}
/*░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░*/
static void write_hash_table(AU *au)
{
DUPES_INFO *in = (DUPES_INFO *)au->info;
in->data_handle.seek_raw(0L, SEEK_SET);
if (in->data_handle.write_raw(in->hash, HASH_TABLE_SIZE) != HASH_TABLE_SIZE)
{
au_printf_error(au, "\nWrite Error");
exit(1);
}
if (in->data_handle.write_raw(&in->used, 2) != 2)
{
au_printf_error(au, "\nRead Error");
exit(1);
}
}
/*░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░*/
static void read_data_struct(AU *au, FILE_CONT *contents, long record)
{
DUPES_INFO *in = (DUPES_INFO *)au->info;
in->data_handle.seek_raw(HASH_TABLE_SIZE + 2 + sizeof(FILE_CONT)*(record-1), SEEK_SET);
in->data_handle.read_raw(contents, sizeof(FILE_CONT));
}
/*░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░*/
static void write_data_struct(AU *au, FILE_CONT *contents,long record)
{
DUPES_INFO *in = (DUPES_INFO *)au->info;
in->data_handle.seek_raw(HASH_TABLE_SIZE + 2 + sizeof(FILE_CONT)*(record-1), SEEK_SET);
if (in->data_handle.write_raw(contents, sizeof(FILE_CONT)) != sizeof(FILE_CONT))
{
au_printf_error(au, "\nWrite Error");
exit(1);
}
}
/*░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░*/
static BYTE add_to_list(AU *au, char *arcFile, char *filename, unsigned long crc, long size)
{
FILE_CONT temp;
long end;
unsigned int hash_code;
DUPES_INFO *in = (DUPES_INFO *)au->info;
if (size < in->size_thresh)
return FALSE;
if (strlen(filename) >= FILE_SIZE)
filename[FILE_SIZE-1] = '\0';
strcpy(temp.arcFile, arcFile);
strcpy(temp.filename, filename);
temp.crc = crc;
temp.size = size;
temp.next = 0L;
hash_code = crc % HASH_SLOTS;
in->last_record++;
if (in->hash[hash_code].first == 0)
in->hash[hash_code].first = in->hash[hash_code].last = in->last_record;
else /* thread the previous */
{
FILE_CONT last;
read_data_struct(au, &last, in->hash[hash_code].last);
last.next = in->last_record;
write_data_struct(au, &last, in->hash[hash_code].last);
in->hash[hash_code].last = in->last_record;
}
write_data_struct(au, &temp, in->last_record);
return TRUE;
}
/*░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░*/
static BYTE dupes_one(AU *au, char *file_name, ARC_HANDLE *arc_handle)
{
ARC_RECORD record;
int ret_code;
BYTE returnCode = FALSE;
char string[FLENGTH];
DUPES_INFO *in = (DUPES_INFO *)au->info;
for (;;)
{
ret_code = arc_handle->get_record(au, &record);
if (ret_code == EOF)
break;
else if (ret_code == -2)
{
add_to_bad_list(au, au->source_directory, file_name);
return FALSE;
}
else if (ret_code == -3)
return FALSE;
build_fname(string, au->source_directory, file_name);
if (add_to_list(au, string, record.name, record.crc, record.unpacked_size))
returnCode = TRUE;
in->number_inside_processed++;
}
return returnCode;
}
/*░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░*/
static int dupes(AU *au, char *file_name)
{
ARC_HANDLE arc_handle;
char string[FLENGTH];
struct ffblk ffblk;
DUPES_INFO *in = (DUPES_INFO *)au->info;
BYTE processed = FALSE;
check_for_key();
if (in->do_arcs != OFF)
{
arc_handle.init(au, file_name);
if (arc_handle.type > 0 && au->package[arc_handle.type].crc != 0)
{
processed = dupes_one(au, file_name, &arc_handle);
}
arc_handle.deinit(au);
}
if (in->do_arcs != ONLY)
{
/* Place the archive itself in the list */
build_fname(string, au->source_directory, file_name);
findfirst(file_name, &ffblk, 0);
if (in->use_32)
processed |=
add_to_list(au, string, "", crc32(au, file_name), ffblk.ff_fsize);
else
processed |=
add_to_list(au, string, "", crc16(au, file_name), ffblk.ff_fsize);
}
if (processed)
{
au->number_processed++;
if (in->verbose)
au_printf(au, "@?6Processed @?1%s@?H\n", file_name);
}
return 0;
}
/*░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░*/
static BYTE parse_comm_line(AU *au, char option, char *cur_argv,
PARSE_TYPE type)
{
DUPES_INFO *in = (DUPES_INFO *)au->info;
switch (type)
{
case PARSE_SINGLE_OPTION:
if (option == '3')
{
in->use_32 = TRUE;
return TRUE;
}
if (option == '1')
{
in->use_32 = FALSE;
return TRUE;
}
else if (option == 'V')
{
in->verbose = TRUE;
return TRUE;
}
return FALSE;
case PARSE_PARAM_OPTION:
switch (option)
{
case 'W':
strcpy(au->dest_directory, cur_argv);
break;
case 'A': /* Smart mode on/off/always */
in->do_arcs = get_value(au, OFF | ON | ONLY);
break;
case 'D':
strcpy(in->data_file_name, cur_argv);
in->keep_data_file = TRUE;
break;
case 'L':
strcpy(in->log_file_name, cur_argv);
break;
case 'T':
in->size_thresh = atol(cur_argv);
break;
case '?':
au_standard_opt_header(au, "DUpes",
"@?3-16@?H use 16 bit CRCs for non-arc files\n"
"@?3-32@?H use 32 bit CRCs for non-arc files (default)\n"
"@?3-A@?Hon|off|only process Archive files\n"
"@?3-D@?H<file> Data file. If unspecified, a temp file is used and deleted\n"
"@?3-L@?H<file> Log file. Contains listing of duplicates\n"
"@?3-T@?Hn size Threshold\n"
"@?3-V@?H Verbose listing of files as processed\n"
"@?3-W@?H<path> Work directory\n");
exit(0);
default:
au_invalid_option(au, PROGRAM, option);
}
return TRUE;
}
return FALSE;
}
/*░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░*/
static void print_results(AU *au)
{
FILE_CONT temp,temp2;
int first = TRUE;
long rec;
DUPES_INFO *in = (DUPES_INFO *)au->info;
au_printf_c(au, 15, "\nDuplicate Files:\n\n");
for (int i=0; i<HASH_SLOTS; i++)
{
if (in->hash[i].first != 0)
{
read_data_struct(au, &temp, in->hash[i].first);
while (temp.next != 0)
{
if (temp.next > in->hash[i].last)
{
au_printf_error(au, "Corrupt data file in hash thread %d\n", i);
break;
}
if (temp.used != in->used)
{
rec = temp.next;
read_data_struct(au, &temp2, rec);
for(EVER)
{
if (temp2.next > in->hash[i].last)
{
au_printf_error(au, "Corrupt data file in hash thread %d\n", i);
break;
}
if (temp.crc == temp2.crc && temp.size == temp2.size &&
temp2.used != in->used)
{
if (first)
{
if (temp.filename[0] != '\0')
{
au_printf(au, "(@?B%s@?H inside @?1%s@?H)", temp.filename, temp.arcFile);
dupes_log(au, "(%s inside %s)", temp.filename, temp.arcFile);
}
else
{
au_printf(au, "@?1%s@?H", temp.arcFile);
dupes_log(au, "%s", temp.arcFile);
}
au_printf(au, "\n");
dupes_log(au, "\n");
first = FALSE;
}
au_printf(au, " ");
dupes_log(au, " ");
if (temp2.filename[0] != '\0')
{
au_printf(au, "(@?C%s@?H inside @?2%s@?H)", temp2.filename, temp2.arcFile);
dupes_log(au, "(%s inside %s)", temp2.filename, temp2.arcFile);
}
else
{
au_printf(au, "@?2%s@?H", temp2.arcFile);
dupes_log(au, "%s", temp2.arcFile);
}
au_printf(au, "\n");
dupes_log(au, "\n");
temp2.used = in->used;
write_data_struct(au, &temp2, rec);
}
if (temp2.next == 0)
break;
else
{
rec = temp2.next;
read_data_struct(au, &temp2, rec);
}
}
}
read_data_struct(au, &temp, temp.next);
first = TRUE;
}
}
}
}
/*░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░*/
static void end_program(void)
{
DUPES_INFO *in = (DUPES_INFO *)glob_au->info;
if (in->data_handle.is_open())
{
write_hash_table(glob_au);
in->data_handle.close();
if (!in->keep_data_file)
unlink(in->data_file_name);
}
if (in->log_file_name[0] != '\0')
{
in->log_handle.close();
}
if (in->hash != NULL)
free(in->hash);
return;
}
/*░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░*/
int main_dupes(AU *au, int argc, char *argv[])
{
DUPES_INFO *in;
long space_required;
in = new DUPES_INFO;
memset(in, '\0', sizeof(DUPES_INFO));
au->info = in;
in->do_arcs=ON;
in->size_thresh=1;
in->use_32 = TRUE;
in->used = 1;
ReadGlobalCFGInfo(au, au->cfg_file, PROGRAM, NULL);
generic_parse_comm_line(au, argc, argv, parse_comm_line);
atexit(end_program);
in->hash = (HASH *)au_calloc(au, sizeof(HASH), HASH_SLOTS);
if (in->data_file_name[0] == '\0')
build_fname(in->data_file_name, au->dest_directory, "dupes94.dat");
if (in->keep_data_file && access(in->data_file_name, 0x00) == 0)
{
in->data_handle.open(au, in->data_file_name, O_BINARY | O_RDWR);
read_hash_table(au);
}
else
{
_fmode = O_BINARY;
in->data_handle.create(in->data_file_name, S_IREAD|S_IWRITE);
_fmode = O_TEXT;
}
if (in->log_file_name[0] != '\0')
{
in->log_handle.open(au, in->log_file_name, O_CREAT|O_WRONLY|O_TEXT|O_APPEND);
}
process_files(au, dupes);
print_results(au);
space_required = in->data_handle.file_length();
if (!au->no_extra)
{
au_printf_c(au, 15, "\nFiles Processed = %d\n", au->number_processed);
au_printf_c(au, 15, "Files Inside Archives Processed = %ld\n",
in->number_inside_processed);
au_printf_c(au, 15, "Disk space required to hold results = %ld bytes\n", space_required);
}
return 0;
}